About experiment

Effect of non-tuberculous Mycobacteria on the protective efficacy of BCG

Description of the data

Tuberculosis (TB) continues to increase worldwide despite vigorous attempts to control it. Bacillus Calmette-Guerin (BCG) is the only licensed vaccine currently available for protection against TB, however, its efficacy is highly variable between countries. It has been hypothesized that BCG’s variable protection is due, among others, to immunological interference by environmental, non-tuberculous mycobacteria (NTM). However, a definitive mechanism has not been identified so far. Considering the foregoing, We developed a murine model closely resembling the natural history of human exposure to different mycobacterial species, including: 1) BCG vaccination at an early age; 2) exposure to viable NTMs (Mycobacterium avium subsp. avium) via the oral route and 3) maintaining continuous NTM exposure even after TB infection, as occurs in endemic places.

The presented data has six groups:

  1. Saline control: Negative control + Mycobacterium tuberculosis (Mtb) infection
  2. NTM 1X10^5CFU/mL: NTM 1X10^5CFU/mL via drinking water + Mtb infection
  3. BCG: BCG vaccination without NTM + Mtb infection 5: BCG + NTM 1X10^5CFU/mL: BCG vaccination + NTM 1X10^5 CFU/mL via drinking water + Mtb infection

About the technique

We have performed 10x spatial visium transciptomics on formaldehyde-fixed paraffin embedded (FFPE) lung tissues at day 120 post Mtb infection. We have seen lymphoid follicles in lungs of mice vaccinated with BCG, exposed with high concentration of NTM and challenged with Mtb. These lymphoid follicles are correlated with decreased Mtb bacterial burden in the lungs and also with increased B cells and anti-Mtb cell lysate IgA aand IgG antibodies. Therefore, to further evaluate the protection mechanisms, we have performed spatial transcriptomics o lung tissues.

Now let’s start data analysis

install packages

#remove.packages("Seurat")
#install.packages('remotes')
#remotes::install_version("Seurat", version = "4.0.5")
#BiocManager::install("clusterProfiler")
#install.packages("enrichplot")
#installed.packages("patchwork")
#BiocManager::install("SingleR")
#BiocManager::install("celldex")
#BiocManager::install("ensembldb")
#BiocManager::install('limma')
#BiocManager::install("DESeq2")
#organism = "org.Mm.eg.db"
#BiocManager::install(organism)
#BiocManager::install("pathview")
#install.packages("Seurat")
#install.packages("remotes")
#BiocManager::install("rhdf5")
#install.packages("hdf5r")
#BiocManager::install("NeuCA")
#install.packages('devtools')
#install_github("ggjlab/scMCA")
#install.packages("pheatmap")
#BiocManager::install("scater")
#devtools::install_github("sqjin/CellChat")
#BiocManager::install("ComplexHeatmap")

Load libraries

In this step, we are loading the libraries which are required for analysis and plotting of the data.

library(Seurat)
library(devtools)
library(ggplot2)
library(patchwork)
library(dplyr)
library(clusterProfiler)
library(enrichplot)
# we use ggplot2 to add x axis labels (ex: ridgeplot)
library(stringr)
library(celldex)
library(ensembldb)
library(SingleR)
library(DESeq2)
library(org.Mm.eg.db)
library(rhdf5)
library(hdf5r)
library(pheatmap)
library(ComplexHeatmap)
library(CellChat)
library(patchwork)
options(stringsAsFactors = FALSE)
library(NMF)
library(ggalluvial)

load data for mouse 1 and mouse 2

# Mouse 1
ntm_bcg_mouse_1 <- "/Volumes/Seagate_4TB/spatial_ntm_samples/demultiplexed/ntm_bcg_sample_round_one/outs/"

list.files(ntm_bcg_mouse_1)
##  [1] "analysis"                            
##  [2] "cloupe.cloupe"                       
##  [3] "filtered_feature_bc_matrix_ntm_bcg_1"
##  [4] "filtered_feature_bc_matrix.h5"       
##  [5] "metrics_summary.csv"                 
##  [6] "molecule_info.h5"                    
##  [7] "possorted_genome_bam.bam"            
##  [8] "possorted_genome_bam.bam.bai"        
##  [9] "probe_set.csv"                       
## [10] "raw_feature_bc_matrix"               
## [11] "raw_feature_bc_matrix.h5"            
## [12] "spatial"                             
## [13] "spatial_enrichment.csv"              
## [14] "web_summary.html"
ntm_bcg_mouse_1_data <- Load10X_Spatial(
  ntm_bcg_mouse_1,
  filename = "filtered_feature_bc_matrix.h5",
  assay = "spatial",
  slice = "ntmbcg",
  filter.matrix = TRUE,
  to.upper = FALSE,
  image = NULL)

# Mouse 2
ntm_bcg_mouse_2 <- "/Volumes/Seagate_4TB/spatial_ntm_samples/demultiplexed/ntm_bcg_sample_round_two/outs/"

list.files(ntm_bcg_mouse_2)
##  [1] "analysis"                            
##  [2] "cloupe.cloupe"                       
##  [3] "filtered_feature_bc_matrix_ntm_bcg_2"
##  [4] "filtered_feature_bc_matrix.h5"       
##  [5] "metrics_summary.csv"                 
##  [6] "molecule_info.h5"                    
##  [7] "ntm+bcg_web_summary.html"            
##  [8] "possorted_genome_bam.bam"            
##  [9] "possorted_genome_bam.bam.bai"        
## [10] "probe_set.csv"                       
## [11] "raw_feature_bc_matrix"               
## [12] "raw_feature_bc_matrix.h5"            
## [13] "spatial"                             
## [14] "spatial_enrichment.csv"
ntm_bcg_mouse_2_data <- Load10X_Spatial(
 ntm_bcg_mouse_2,
  filename = "filtered_feature_bc_matrix.h5",
  assay = "spatial",
  slice = "ntmbcg",
  filter.matrix = TRUE,
  to.upper = FALSE,
  image = NULL)

data preprocessing

SCTransform function will NormalizeData, FindVariableFeatures, and ScaleData workflow. Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, and data being log1p(counts).

# Mouse 1
ntm_bcg_mouse_1_data <- SCTransform(ntm_bcg_mouse_1_data, assay = "spatial", verbose = FALSE)

# Mouse 2
ntm_bcg_mouse_2_data <- SCTransform(ntm_bcg_mouse_2_data, assay = "spatial", verbose = FALSE)

Visualization of lymphoid follicle markers

Mouse 1

p1 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Cd19", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p2 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Igha", alpha = c(0.1, 1), pt.size.factor = 1.8)
p3 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Bcl6", alpha = c(0.1, 1), pt.size.factor = 1.8)
p4 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Lta", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p5 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Ltb", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p6 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Cd4", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p7 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Mki67", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p8 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Cxcr5", alpha = c(0.1, 1.5), pt.size.factor = 1.8)

p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8

Mouse 2

p1 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Cd19", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p2 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Igha", alpha = c(0.1, 1), pt.size.factor = 1.5)
p3 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Bcl6", alpha = c(0.1, 1), pt.size.factor = 1.5)
p4 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Lta", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p5 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Ltb", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p6 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Cd4", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p7 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Mki67", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p8 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Cxcr5", alpha = c(0.1, 1.5), pt.size.factor = 1.5)

p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8

dimensionality reduction and clustering

Clustering is the assignment of objects to homogeneous groups (called clusters) while making sure that objects in different groups are not similar. Clustering is considered an unsupervised task as it aims to describe the hidden structure of the objects.

Dimensionality reduction is a process to reduce the number of features under consideration, where each feature is a dimension that partly represents the objects. Why is dimensionality reduction important? As more features are added, the data becomes very sparse and analysis suffers from the curse of dimensionality. Additionally, it is easier to process smaller data sets. Dimensionality reduction can be executed using two different methods:

  1. Selecting from the existing features (feature selection)
  2. Extracting new features by combining the existing features (feature extraction)
# Mouse 1
ntm_bcg_mouse_1_analyzed <- RunPCA(ntm_bcg_mouse_1_data, assay = "SCT", verbose = FALSE)
ntm_bcg_mouse_1_analyzed <- FindNeighbors(ntm_bcg_mouse_1_analyzed, reduction = "pca", dims = 1:5)
ntm_bcg_mouse_1_analyzed <- FindClusters(ntm_bcg_mouse_1_analyzed, verbose = FALSE, resolution = 0.8)
ntm_bcg_mouse_1_analyzed <- RunUMAP(ntm_bcg_mouse_1_analyzed, reduction = "pca", dims = 1:5)

# Mouse 2
ntm_bcg_mouse_2_analyzed <- RunPCA(ntm_bcg_mouse_2_data, assay = "SCT", verbose = FALSE)
ntm_bcg_mouse_2_analyzed <- FindNeighbors(ntm_bcg_mouse_2_analyzed, reduction = "pca", dims = 1:5)
ntm_bcg_mouse_2_analyzed <- FindClusters(ntm_bcg_mouse_2_analyzed, verbose = FALSE, resolution = 0.8)
ntm_bcg_mouse_2_analyzed <- RunUMAP(ntm_bcg_mouse_2_analyzed, reduction = "pca", dims = 1:5)

labelling clusters using SingleR pipeline

# First convert the seurat object to single cell experiment object (otherwise the SingleR pipeline will not work)

# Mouse 1
ntm_bcg_mouse_1_analyzed.sce <- as.SingleCellExperiment(ntm_bcg_mouse_1_analyzed)

# Mouse 2
ntm_bcg_mouse_2_analyzed.sce <- as.SingleCellExperiment(ntm_bcg_mouse_2_analyzed)


#create reference data

ref.data <- celldex::ImmGenData()
ref.data
## class: SummarizedExperiment 
## dim: 22134 830 
## metadata(0):
## assays(1): logcounts
## rownames(22134): Zglp1 Vmn2r65 ... Tiparp Kdm1a
## rowData names(0):
## colnames(830):
##   GSM1136119_EA07068_260297_MOGENE-1_0-ST-V1_MF.11C-11B+.LU_1.CEL
##   GSM1136120_EA07068_260298_MOGENE-1_0-ST-V1_MF.11C-11B+.LU_2.CEL ...
##   GSM920654_EA07068_201214_MOGENE-1_0-ST-V1_TGD.VG4+24ALO.E17.TH_1.CEL
##   GSM920655_EA07068_201215_MOGENE-1_0-ST-V1_TGD.VG4+24ALO.E17.TH_2.CEL
## colData names(3): label.main label.fine label.ont
# run singleR pipeline to find the cell types

## Mouse 1
cell_types_mouse_1 <- SingleR(test=ntm_bcg_mouse_1_analyzed.sce, assay.type.test=1, 
    ref=ref.data, labels=ref.data$label.main)

## Mouse 2
cell_types_mouse_2 <- SingleR(test=ntm_bcg_mouse_2_analyzed.sce, assay.type.test=1, 
    ref=ref.data, labels=ref.data$label.main)

merging metadata of cell types to seurat object

# Mouse 1
ntm_bcg_mouse_1_analyzed[["ref.data"]] <- cell_types_mouse_1$labels

# Mouse 2
ntm_bcg_mouse_2_analyzed[["ref.data"]] <- cell_types_mouse_2$labels

Plot umap and spatial plot to localize each cell type

set the colors of the cell types

my_cols <- c('B cells'='#F8766D','DC'='#39568CFF','Endothelial cells'='#CD9600','Epithelial cells'='#00C19A','Fibroblasts'= "#C77CFF",
    'Macrophages'='#00A9FF','Monocytes'='#0CB702','Stromal cells'='#FF61CC')

Mouse 1

p1 <- DimPlot(ntm_bcg_mouse_1_analyzed, group.by = c("ref.data"), reduction = "umap",label = F, pt.size = 1.5, label.size = 5, cols = my_cols)

p2 <- SpatialDimPlot(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", label = FALSE, label.size = 5, cols = my_cols)

p1 + p2

Mouse 2

p1 <- DimPlot(ntm_bcg_mouse_2_analyzed, group.by = c("ref.data"), reduction = "umap",label = F, pt.size = 1.5, label.size = 5, cols = my_cols)

p2 <- SpatialDimPlot(ntm_bcg_mouse_2_analyzed, group.by = c("ref.data"), label = FALSE, label.size = 5, cols = my_cols)

p1 + p2

Plot violin plot with top markers associated with B cells and lymphoid follicles

Mouse 1

VlnPlot(ntm_bcg_mouse_1_analyzed, group.by = c("ref.data"), features = c("Cxcr5", "Mki67", "Ltb", "Cd38", "Ccr6", "Tnfrsf13c", "Fcrla"), cols =  my_cols)

Mouse 2

VlnPlot(ntm_bcg_mouse_2_analyzed, group.by = c("ref.data"), features = c("Cxcr5", "Mki67", "Ltb", "Cd38", "Ccr6", "Tnfrsf13c", "Fcrla"), cols =  my_cols)

find markers in each cluster

Mouse 1

B_cell_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "B cells", min.pct = 0.25)
head(B_cell_mouse_1, n = 10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Spib    1.884647e-23  0.9418102 0.871 0.188 3.071410e-19
## Fcrla   9.367105e-22  0.7372041 0.710 0.127 1.526557e-17
## Ms4a1   2.140325e-20  0.9122884 0.710 0.143 3.488088e-16
## Blk     3.797112e-20  0.5455946 0.452 0.056 6.188154e-16
## Cd19    8.972829e-20  1.4938727 0.903 0.310 1.462302e-15
## Ighd    2.212679e-19  1.0746787 0.968 0.343 3.606004e-15
## Cxcr5   1.248954e-17  0.6716025 0.581 0.105 2.035420e-13
## Cd79b   3.723329e-17  1.2436082 1.000 0.462 6.067909e-13
## Cd52    5.006688e-17  1.1761909 1.000 0.925 8.159399e-13
## H2-DMb2 7.077493e-17  1.3906987 1.000 0.959 1.153419e-12
write.csv(B_cell_mouse_1, "DATA/bcg_ntm/B_cell_mouse_1.csv")

DC_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "DC", min.pct = 0.25)
head(DC_mouse_1, n = 10)
##                      p_val avg_log2FC pct.1 pct.2    p_val_adj
## Tmem132e      9.059847e-86  0.3850616 0.308 0.001 1.476483e-81
## Slc1a2        1.817320e-10  0.4710357 0.462 0.054 2.961686e-06
## Timd4         3.649406e-10  0.6152564 0.615 0.096 5.947437e-06
## Sytl4         2.704997e-09  0.4086301 0.385 0.043 4.408334e-05
## Fcho1         3.276719e-09  0.5709341 0.692 0.128 5.340069e-05
## Il12b         4.327837e-09  0.4568340 0.462 0.061 7.053076e-05
## 6030468B19Rik 8.465842e-09  0.3451095 0.308 0.029 1.379678e-04
## Afmid         3.004945e-08  0.4004987 0.385 0.048 4.897159e-04
## Rhoh          4.550822e-08  0.3967611 0.385 0.049 7.416475e-04
## Gpr132        6.617050e-08  0.5965298 0.692 0.149 1.078381e-03
write.csv(DC_mouse_1, "DATA/bcg_ntm/DC_mouse_1.csv")

Stromal_cells_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "Stromal cells", min.pct = 0.25)
head(Stromal_cells_mouse_1, n = 10)
##              p_val avg_log2FC pct.1 pct.2    p_val_adj
## Des   4.845871e-19  0.6173361 0.860 0.749 7.897316e-15
## Car3  9.790880e-18  1.3014565 0.530 0.353 1.595620e-13
## Cox8b 4.075760e-16  0.5873284 0.258 0.123 6.642266e-12
## Ttn   2.775457e-15  0.9879534 0.358 0.211 4.523162e-11
## Acta1 2.926203e-15  0.7622334 0.261 0.129 4.768833e-11
## Myh11 4.216275e-15  0.2647998 0.485 0.302 6.871263e-11
## Tpm1  2.389877e-14  0.6391463 1.000 1.000 3.894783e-10
## Myom1 1.335642e-13  0.3498079 0.259 0.133 2.176695e-09
## Fabp4 3.689892e-13  1.2113353 0.515 0.372 6.013417e-09
## Tnnc1 7.578253e-13  0.9014321 0.347 0.214 1.235028e-08
write.csv(Stromal_cells_mouse_1, "DATA/bcg_ntm/Stromal_cells_mouse_1.csv")


Endothelial_cells_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "Endothelial cells", min.pct = 0.25)
head(Endothelial_cells_mouse_1, n = 10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Cldn5   2.907564e-94  0.6746717 0.999 0.945 4.738457e-90
## Igfbp2  1.828819e-82  0.6540357 1.000 0.948 2.980426e-78
## Acer2   8.520134e-81  0.5753817 0.997 0.947 1.388526e-76
## Tns1    1.568608e-78  0.5137977 1.000 0.991 2.556360e-74
## C3      1.460961e-77 -0.7289525 0.997 0.998 2.380928e-73
## Pakap.1 1.418332e-76  0.4983050 0.999 0.984 2.311456e-72
## C1qa    8.764174e-74 -0.7462537 0.904 0.973 1.428297e-69
## Egfl7   4.863529e-73  0.5618153 1.000 0.912 7.926094e-69
## Inmt    5.234591e-73  0.5960425 0.997 0.943 8.530813e-69
## Epas1   6.429208e-73  0.5337445 1.000 0.966 1.047768e-68
write.csv(Endothelial_cells_mouse_1, "DATA/bcg_ntm/Endothelial_cells_mouse_1.csv")

Epithelial_cell_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "Epithelial cells", min.pct = 0.25)
head(Epithelial_cell_mouse_1, n = 10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Cfap99  2.122884e-40  0.3433547 0.273 0.003 3.459665e-36
## Eno4    2.449205e-28  0.5165823 0.455 0.015 3.991469e-24
## Spef2   6.332139e-27  0.7402837 0.636 0.033 1.031949e-22
## Dnah6   3.985786e-26  0.6749811 0.545 0.025 6.495636e-22
## Efcab1  1.411071e-24  0.5140130 0.455 0.018 2.299623e-20
## Ccdc121 1.582202e-22  0.5223802 0.364 0.013 2.578515e-18
## Alox12e 1.497807e-21  0.5829836 0.545 0.030 2.440976e-17
## Ggt6    2.925302e-21  0.5095276 0.455 0.021 4.767365e-17
## Fam81b  3.546960e-21  0.7434482 0.545 0.032 5.780480e-17
## Dpp6    1.736230e-20  0.4273356 0.364 0.014 2.829533e-16
write.csv(Epithelial_cell_mouse_1, "DATA/bcg_ntm/Epithelial_cell_mouse_1.csv")

Fibroblasts_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "Fibroblasts", min.pct = 0.25)
head(Fibroblasts_mouse_1, n = 10)
##               p_val avg_log2FC pct.1 pct.2    p_val_adj
## Gsn    2.129296e-08  0.2501452 0.988 0.959 0.0003470114
## Clec3b 1.243076e-06  0.3409435 0.958 0.922 0.0202584109
## Col3a1 3.726317e-06  0.2717886 0.994 0.989 0.0607277882
## Ighg2b 3.038943e-04 -0.3535009 0.427 0.522 1.0000000000
## Iglc1  3.444621e-04 -0.3950479 0.615 0.662 1.0000000000
## Cbr2   3.688341e-04  0.3383723 0.885 0.821 1.0000000000
## Ighj1  3.315223e-03 -0.3933206 0.939 0.951 1.0000000000
## Slpi   3.407754e-03 -0.3098203 0.939 0.960 1.0000000000
## Jchain 7.673227e-03 -0.3719164 0.991 0.981 1.0000000000
## Cyp2f2 1.836577e-02  0.3969943 0.997 0.980 1.0000000000
  write.csv(Fibroblasts_mouse_1, "DATA/bcg_ntm/Fibroblasts_mouse_1.csv")
  
Macrophages_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "Macrophages", min.pct = 0.25)
  head(Macrophages_mouse_1, n = 10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Inmt    5.947621e-120 -1.5346909 0.851 0.988 9.692838e-116
## Igfbp2  9.558494e-111 -1.4883978 0.881 0.988 1.557748e-106
## Tns1    6.016755e-110 -1.0762786 0.978 0.998 9.805506e-106
## Cldn5   1.427166e-108 -1.3438779 0.873 0.987 2.325852e-104
## Epas1   1.179950e-104 -1.1411513 0.923 0.991 1.922965e-100
## Cd74     5.804052e-99  0.4779757 1.000 1.000  9.458864e-95
## Pcolce2  1.269977e-98 -1.2735655 0.624 0.936  2.069682e-94
## Fmo2     2.253327e-98 -1.1674608 0.851 0.977  3.672247e-94
## C3       6.998009e-95  0.8316677 0.997 0.997  1.140465e-90
## Acer2    2.881571e-92 -1.0832747 0.892 0.983  4.696097e-88
write.csv(Macrophages_mouse_1, "DATA/bcg_ntm/Macrophages_mouse_1.csv")

Mouse 2

B_cell_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "B cells", min.pct = 0.25)
head(B_cell_mouse_2, n = 10)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## Cr2       8.111841e-111  1.3335168 0.781 0.140 1.147339e-106
## Tnfrsf13c 6.610158e-109  1.5062338 0.870 0.215 9.349407e-105
## Cd19      1.798695e-100  1.5825104 0.932 0.293  2.544075e-96
## Cd79a     6.882185e-100  2.0061965 0.995 0.655  9.734163e-96
## Cxcr5      5.656108e-98  1.0245163 0.719 0.128  8.000000e-94
## Fcrla      9.070035e-93  0.9888835 0.797 0.175  1.282866e-88
## Cd79b      1.744343e-92  1.8567577 0.969 0.428  2.467198e-88
## Ighd       6.213102e-91  1.4257687 0.870 0.252  8.787811e-87
## Spib       7.115527e-90  1.3790276 0.906 0.291  1.006420e-85
## Fcrl5      9.235320e-89  0.9367736 0.615 0.097  1.306244e-84
write.csv(B_cell_mouse_2, "DATA/bcg_ntm/B_cell_mouse_2.csv")

DC_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "DC", min.pct = 0.25)
head(DC_mouse_2, n = 10)
##              p_val avg_log2FC pct.1 pct.2    p_val_adj
## Fscn1 5.048929e-20  1.0065368 0.903 0.528 7.141206e-16
## Ager  2.407386e-18 -0.9210121 0.860 0.969 3.405007e-14
## Il12b 3.123756e-18  0.3339538 0.290 0.057 4.418241e-14
## Adam8 1.850423e-17  0.5687241 0.774 0.354 2.617239e-13
## Vegfa 1.453622e-16 -0.9902210 0.656 0.908 2.056003e-12
## Itk   1.617670e-16  0.5327627 0.763 0.354 2.288033e-12
## Trbc2 2.673189e-16  0.7156530 0.925 0.739 3.780958e-12
## Sftpc 5.873863e-16 -0.7422910 1.000 1.000 8.307992e-12
## Timp3 6.005500e-16 -0.9851887 0.828 0.947 8.494179e-12
## Tns1  9.956396e-16 -1.0641873 0.753 0.904 1.408233e-11
write.csv(DC_mouse_2, "DATA/bcg_ntm/DC_mouse_2.csv")

Stromal_cells_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "Stromal cells", min.pct = 0.25)
head(Stromal_cells_mouse_2, n = 10)
##                 p_val avg_log2FC pct.1 pct.2    p_val_adj
## Eln      1.044974e-17  1.0589486 0.907 0.642 1.478012e-13
## Myh11    1.458198e-15  1.0955090 0.573 0.222 2.062476e-11
## Gsn      7.872851e-15  0.7161536 1.000 0.872 1.113536e-10
## Ltbp4    1.257061e-14  0.9702758 0.867 0.635 1.777987e-10
## Fmo2     2.340566e-13  0.7548595 0.973 0.737 3.310496e-09
## Myl9     5.182858e-13  0.7656431 0.480 0.177 7.330634e-09
## Clec3b   1.571603e-12  0.9195858 0.893 0.706 2.222876e-08
## Cst3     3.865944e-12  0.4678424 1.000 1.000 5.467991e-08
## Ppp1r14a 6.776818e-12  0.6279264 0.947 0.737 9.585131e-08
## Inmt     1.781123e-11  0.7479552 1.000 0.655 2.519221e-07
write.csv(Stromal_cells_mouse_2, "DATA/bcg_ntm/Stromal_cells_mouse_2.csv")


Endothelial_cells_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "Endothelial cells", min.pct = 0.25)
head(Endothelial_cells_mouse_2, n = 10)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj
## Igfbp2  1.277601e-126  1.4188105 0.953 0.598 1.807039e-122
## Cldn5   2.129545e-124  1.2155326 0.981 0.678 3.012029e-120
## Epas1   1.746221e-115  1.1141478 0.991 0.841 2.469855e-111
## Tns1    1.282172e-112  0.9788669 0.998 0.854 1.813504e-108
## Inmt    6.138493e-109  1.1670392 0.941 0.553 8.682285e-105
## Pakap.1  2.127892e-97  0.8698975 0.988 0.868  3.009690e-93
## Ager     5.857310e-95  0.7197373 1.000 0.949  8.284579e-91
## Vegfa    1.347840e-93  0.8909449 0.993 0.855  1.906384e-89
## Ace      2.240201e-93  0.9738672 0.920 0.605  3.168540e-89
## Spock2   4.833370e-91  0.9284602 0.913 0.549  6.836318e-87
write.csv(Endothelial_cells_mouse_2, "DATA/bcg_ntm/Endothelial_cells_mouse_2.csv")

Epithelial_cell_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "Epithelial cells", min.pct = 0.25)
head(Epithelial_cell_mouse_2, n = 10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Inmt    3.272768e-19 -1.6073218  0.27 0.690 4.629003e-15
## Igfbp2  7.948094e-18 -1.6933439  0.36 0.722 1.124178e-13
## Ckmt1   1.320948e-16  0.3963712  0.45 0.142 1.868349e-12
## Lcn2    1.411799e-16  0.8342200  0.96 0.842 1.996848e-12
## Fxyd3   2.430392e-16  0.3067940  0.27 0.057 3.437547e-12
## Cldn3   5.101704e-16  0.6185109  0.99 0.862 7.215849e-12
## Spint2  6.448865e-16  0.5746184  1.00 0.990 9.121275e-12
## Anxa8   2.003781e-15  0.4762841  0.37 0.108 2.834148e-11
## Tnfaip2 3.153039e-15  0.6345622  1.00 0.998 4.459659e-11
## Acer2   8.382696e-15 -0.9449319  0.68 0.850 1.185648e-10
write.csv(Epithelial_cell_mouse_2, "DATA/bcg_ntm/Epithelial_cell_mouse_2.csv")

Fibroblasts_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "Fibroblasts", min.pct = 0.25)
head(Fibroblasts_mouse_2, n = 10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Gsn     1.952403e-18  0.7394821 0.977 0.870 2.761479e-14
## Clec3b  5.949273e-16  0.7269997 0.870 0.702 8.414652e-12
## Cst3    6.636390e-15  0.4288308 1.000 1.000 9.386509e-11
## Ltbp4   1.739443e-14  0.5883409 0.863 0.628 2.460268e-10
## Selenop 1.122721e-13  0.4353445 0.992 0.943 1.587976e-09
## H2-DMb2 1.395579e-13 -0.5924872 0.985 0.999 1.973907e-09
## Gimap7  3.606865e-13 -0.6385908 0.198 0.536 5.101550e-09
## Ctsz    2.339172e-12 -0.4897233 0.985 0.990 3.308525e-08
## Inmt    7.760718e-12  0.5850922 0.931 0.649 1.097676e-07
## Tnfaip2 6.435971e-11 -0.5288026 1.000 0.998 9.103038e-07
  write.csv(Fibroblasts_mouse_2, "DATA/bcg_ntm/Fibroblasts_mouse_2.csv")
  
Macrophages_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "Macrophages", min.pct = 0.25)
  head(Macrophages_mouse_2, n = 10)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## Ctss    9.054290e-69  0.6053018 1.000 1.000 1.280639e-64
## Gbp2b   3.885999e-58  0.5941220 0.999 0.989 5.496357e-54
## Nos2    6.201747e-55  0.8442604 0.846 0.671 8.771750e-51
## Lgals3  6.395388e-47  0.5912143 0.997 0.976 9.045637e-43
## Fth1    6.493402e-47  0.3267162 1.000 1.000 9.184268e-43
## Bst2    1.449126e-46  0.4767256 0.990 0.981 2.049645e-42
## Psap    1.140185e-45  0.4178347 1.000 1.000 1.612678e-41
## Tnfaip2 3.070388e-44  0.5213674 0.999 0.997 4.342756e-40
## Ctsb    2.057596e-42  0.5245380 0.996 0.993 2.910263e-38
## Cldn18  2.526096e-38  0.4332956 0.995 0.954 3.572910e-34
write.csv(Macrophages_mouse_2, "DATA/bcg_ntm/Macrophages_mouse_2.csv")
B_cell_deseq_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data",  ident.1 = "B cells", test.use = "DESeq2")
write.csv(B_cell_deseq_1, "DATA/bcg_ntm/B_cell_deseq_mouse1.csv")

B_cell_deseq_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data",  ident.1 = "B cells", test.use = "DESeq2")
write.csv(B_cell_deseq_2, "DATA/bcg_ntm/B_cell_deseq_mouse2.csv")

Gene set enrichment

organism = org.Mm.eg.db

prepare input data for gene set enrichment

# reading in data from deseq2
df = read.csv("DATA/bcg_ntm/B_cell_deseq_mouse1.csv", header=TRUE)

# we want the log2 fold change 
original_gene_list <- df$avg_log2FC

# name the vector
names(original_gene_list) <- df$X

# omit any NA values 
gene_list<-na.omit(original_gene_list)

# sort the list in decreasing order (required for clusterProfiler)
gene_list = sort(gene_list, decreasing = TRUE)

Gene set enrichment

#gene set enrichment function for B cell cluster

gse <- gseGO(geneList=gene_list, 
             ont ="ALL", 
             keyType = "SYMBOL", 
             minGSSize = 3, 
             maxGSSize = 800, 
             pvalueCutoff = 0.05, 
             verbose = TRUE, 
             OrgDb = organism, 
             pAdjustMethod = "none",
             eps = 0.0,
             nPermSimple = 10000)

dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)

KEGG pathway analysis

# Convert gene IDs for gse to KEGG function
# We will lose some genes here because not all IDs will be converted

ids<-bitr(names(gene_list), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=organism)
 
# remove duplicate IDS (here I use "ENSEMBL", but it should be whatever was selected as keyType)
dedup_ids = ids[!duplicated(ids[c("SYMBOL")]),]

# Create a new dataframe df2 which has only the genes which were successfully mapped using the bitr function above
df2 = df[df$X %in% dedup_ids$SYMBOL,]

# Create a new column in df2 with the corresponding ENTREZ IDs
df2$Y = dedup_ids$ENTREZID

# Create a vector of the gene unuiverse
kegg_gene_list <- df2$avg_log2FC

# Name vector with ENTREZ ids
names(kegg_gene_list) <- df2$Y

# omit any NA values 
kegg_gene_list<-na.omit(kegg_gene_list)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)

running kegg

kegg_organism = "mmu"
kk2 <- gseKEGG(geneList     = kegg_gene_list,
               organism     = kegg_organism,
               minGSSize    = 3,
               maxGSSize    = 800,
               pvalueCutoff = 0.05,
               pAdjustMethod = "none",
               keyType       = "ncbi-geneid",
               eps = 0.0)

write.csv(kk2, "DATA/bcg_ntm/kk2.csv")

dotplot(kk2, showCategory=, split=".sign") + facet_grid(.~.sign)

library(pathview)

# Produce the native KEGG plot (PNG) for first set of genes
dme <- pathview(gene.data=kegg_gene_list, pathway.id="mmu05417", species = kegg_organism, low = list(gene = "#453781FF"), mid =
list(gene = "#20A387FF"), high = list(gene = "#FDE725FF"))

# Produce a different plot (PDF) (not displayed here)
dme <- pathview(gene.data=kegg_gene_list, pathway.id="mmu05417", species = kegg_organism, kegg.native = F, low = list(gene = "#453781FF"), mid =
list(gene = "#20A387FF"), high = list(gene = "#FDE725FF"))